#
# Copyright (c) 2021, 2023 supercell
#
# SPDX-License-Identifier: BSD-3-Clause
#
require "uri"

require "../extern/dart_uri"
require "./assets/html_entities"

module Luce
  @@one_or_more_whitespace_pattern = Regex.new("[ \n\r\t]+")

  # :nodoc:
  protected def self.array_insert_all(array : Array, index : Int, other : Indexable)
    {% if compare_versions(Crystal::VERSION, "1.8.0") >= 0 %}
      array[index, 0] = other
    {% else %}
      # Crystal versions below 1.8.0 had a bug on range assignment
      # https://github.com/crystal-lang/crystal/pull/13275
      return array.concat(other) if index == array.size

      original_size = array.size
      array.concat(other)
      copy = array[index...original_size]
      array[-copy.size..] = copy
      array[index...index + other.size] = other
    {% end %}
  end

  # Escapes (`"`), (`<`), (`>`) and (`&`) characters.
  # Escapes (`'`) if *escape_apos* is `true`.
  protected def self.escape_html(html : String, escape_apos : Bool = true) : String
    escape_html_impl(
      html,
      HtmlEscapeMode.new(
        escape_apos: escape_apos,
        escape_lt_gt: true,
        escape_quot: true
      )
    )
  end

  # Escapes (`"`), (`<`) and (`>`) characters.
  protected def self.escape_html_attribute(text : String) : String
    escape_html_impl(text, HtmlEscapeMode::ATTRIBUTE)
  end

  # :nodoc:
  struct HtmlEscapeMode
    ELEMENT   = HtmlEscapeMode.new("element", true)
    ATTRIBUTE = HtmlEscapeMode.new("attribute", true, true)

    getter name : String
    getter? escape_lt_gt : Bool
    getter? escape_quot : Bool
    getter? escape_apos : Bool
    getter? escape_slash : Bool

    def initialize(@name = "custom", @escape_lt_gt = false, @escape_quot = false,
                   @escape_apos = false, @escape_slash = false)
    end

    def to_s : String
      @name
    end
  end

  # :nodoc:
  protected def self.escape_html_impl(html : String, mode : HtmlEscapeMode) : String
    builder = String::Builder.new

    html.each_char do |char|
      replacement : String = char.to_s

      case char
      when '&'
        replacement = "&amp;"
      when '"'
        replacement = "&quot;" if mode.escape_quot?
      when '\''
        replacement = "&#39;" if mode.escape_apos?
      when '<'
        replacement = "&lt;" if mode.escape_lt_gt?
      when '>'
        replacement = "&gt;" if mode.escape_lt_gt?
      when '/'
        replacement = "&#47" if mode.escape_slash?
      end

      builder << replacement
    end

    builder.to_s
  end

  # Normalizes a link destination, including the process of HTML characters
  # decoding and percent encoding.
  protected def self.normalize_link_destination(destination : String) : String
    # See the description of these examples:
    # https://spec.commonmark.org/0.30/#example-501
    # https://spec.commonmark.org/0.30/#example-502

    # Split by url escaping characters
    # Concatenate then with unmodified URL-escaping
    # URL-escaping should be left alone inside the destination
    # Refer: https://spec.commonmark.org/0.30/#example-502
    regex = Regex.new("%[0-9A-Fa-f]{2}")
    Luce.string_split_map_join(destination, regex,
      on_match: ->(m : Regex::MatchData) { m[0] },
      on_non_match: ->(e : String) {
        begin
          e = URI.decode(e)
        rescue
        end
        DartURI.encode_full(decode_html_characters(e))
      })
  end

  # Normalizes a link title, including the process of HTML characters decoding
  # and HTML characters escaping.
  protected def self.normalize_link_title(title : String) : String
    # See the description of these examples:
    # https://spec.commonmark.org/0.30/#example-505
    # https://spec.commonmark.org/0.30/#example-506
    # https://spec.commonmark.org/0.30/#example-507
    # https://spec.commonmark.org/0.30/#example-508
    escape_html_attribute(decode_html_characters(title))
  end

  # "Normalizes" a link label, according to the [CommonMark spec].
  #
  # [CommonMark spec]: https://spec.commonmark.org/0.30/#link-label
  protected def self.normalize_link_label(label : String) : String
    label.strip.gsub(@@one_or_more_whitespace_pattern, " ").downcase(Unicode::CaseOptions::Fold)
  end

  #  Decodes HTML entity and numeric character references, for example decode
  # `&#35` to `#`.
  protected def self.decode_html_characters(input : String) : String
    input.gsub(Luce.html_characters_pattern) { |_, match| Luce.decode_html_characters_from_match(match) }
  end

  # Decodes HTML entity and numeric character references from the given *match*.
  protected def self.decode_html_characters_from_match(match : Regex::MatchData) : String
    text = match[0]
    entity = match[1]?
    decimal_number = match[2]?
    hexadecimal_number = match[3]?

    # Entity references, see
    # https://spec.commonmark.org/0.30/#entity-references.
    return (html_entities_map[text]? || text) unless entity.nil?

    # Decimal numeric character references, see
    # https://spec.commonmark.org/0.30/#decimal-numeric-character-references.
    unless decimal_number.nil?
      decimal_value = Int32.new(decimal_number)
      hex_value = if decimal_value < 1_114_112 && decimal_value > 1
                    decimal_value.to_s(16).to_i32(16)
                  else
                    0xFFFD
                  end

      return hex_value.chr.to_s
    end

    # Hexadecimal numeric character references, see
    # https://spec.commonmark.org/0.30/#hexadecimal-numeric-character-references.
    unless hexadecimal_number.nil?
      hex_value = hexadecimal_number.to_i32(16)
      hex_value = 0xFFFd if hex_value > 0x10FFFF || hex_value == 0
      return hex_value.chr.to_s
    end

    text
  end

  # Escapes the ASCII punctuation characters after backslash(`\`).
  protected def self.escape_punctuation(input : String) : String
    buffer = String::Builder.new

    i = 0
    while i < input.size
      if input.codepoint_at(i) == Charcode::BACKSLASH
        _next = i + 1 < input.size ? input[i + 1] : nil
        unless _next.nil?
          i += 1 if ascii_punctuation_characters.includes?(_next)
        end
      end

      buffer << input[i]
      i += 1
    end

    buffer.to_s
  end

  # A class that describes a dedented text
  class DedentedText
    # The indented text
    getter text : String

    # How many spaces of tab that remain after part of it has been consumed.
    #
    # `nil` means that we did not read a `tab`.
    getter tab_remaining : Int32?

    def initialize(@text : String, @tab_remaining : Int32?)
    end
  end

  # Removes up to *number_of_whitespace_characters* characters of leading whitespace.
  protected def self.dedent_string(string : String, number_of_whitespace_characters : Int32 = 4) : DedentedText
    # The way of handling tabs: https://spec.commonmark.org/0.30/#tabs
    whitespace_match = Regex.new("^[ \t]{0,#{number_of_whitespace_characters}}").match(string)
    tab_size = 4

    tab_remaining : Int32?
    start = 0
    whitespaces = whitespace_match ? whitespace_match[0]? : nil
    unless whitespaces.nil?
      indent_length = 0
      while start < whitespaces.size
        is_tab = whitespaces[start] == '\t'
        if is_tab
          indent_length += tab_size
          tab_remaining = 4
        else
          indent_length += 1
        end
        if indent_length >= number_of_whitespace_characters
          if tab_remaining != nil
            tab_remaining = indent_length - number_of_whitespace_characters
          end
          if indent_length == number_of_whitespace_characters || is_tab
            start += 1
          end
          break
        end
        unless tab_remaining.nil?
          tab_remaining = 0
        end
        start += 1
      end
    end

    Luce::DedentedText.new(string[start..], tab_remaining)
  end

  # Calculates the length of indentation *string* has.
  #
  # The behaviour of tabs: https://spec.commonmark.org/0.30/#tabs
  protected def self.string_indentation(string : String) : Int32
    length = 0
    string.each_codepoint do |char|
      break if char != Luce::Charcode::SPACE && char != Luce::Charcode::TAB
      length += char == Luce::Charcode::TAB ? 4 - (length % 4) : 1
    end
    length
  end

  # Splits *string*, converts its parts, and combines them into a new String.
  #
  # The *pattern* is used to split the *string* into parts and separating matches.
  # Each match of `String#scan` of *pattern* on *string* is used as a match, and
  # the substrings between the end of one match (or the start of *string*) and the
  # start of the next match (or the end of *string*) is treated as a non-matched
  # part.
  protected def self.string_split_map_join(string : String, pattern : Regex,
                                           on_match : Proc(Regex::MatchData, String)? = nil,
                                           on_non_match : Proc(String, String)? = nil) : String
    on_match = on_match.nil? ? ->(match : Regex::MatchData) { match[0] } : on_match
    on_non_match = on_non_match.nil? ? ->(non_match : String) { non_match } : on_non_match

    builder = String::Builder.new
    start_index = 0
    string.scan(pattern).each do |match|
      builder << on_non_match.call(string[start_index...match.begin])
      builder << on_match.call(match).to_s
      start_index = match.end
    end
    builder << on_non_match.call(string[start_index..])
    builder.to_s
  end
end

class Array(T) < Reference
  # Insert all objects of *iterable* at position *index* in this array.
  #
  # This increases the length of the array by the length of *iterable*
  # and shifts all later objects towards the end of the array.
  #
  # The *index* value must be non-negative and no greater than `size`.
  #
  # ```
  # arr = [1, 2, 3, 7]
  #
  # arr.insert_all(4, [8, 9])
  # puts arr # => [1, 2, 3, 7, 8, 9]
  #
  # arr.insert_all(3, [4, 5, 6])
  # puts arr # => [1, 2, 3, 4, 5, 6, 7, 8, 9]
  # ```
  {% if Array.has_method? :insert_all %}
    @[Deprecated("Luce is removing its custom extensions. Use Array#insert_all(Int, Indexable) instead. Will be removed with Luce v1.0")]
  {% else %}
    @[Deprecated("Luce is removing its custom extensions. Use Array range assignment instead. Will be removed with Luce v1.0")]
  {% end %}
  def insert_all(index : Int32, iterable : Iterable(T)) : self
    if index < 0 || index > size
      raise IndexError.new
    end
    # We'd be inserting at the end, which is what concat does.
    if index == size
      concat(iterable)
      return self
    end

    # Nothing to insert...
    return self if iterable.size == 0

    # Make sure we're only copying the appropriate size
    # from the original array in to the space created
    # by `concat`.
    size_to_copy = size > iterable.size ? -iterable.size : size

    # Increases the capacity and size
    concat(iterable)

    # Copy the original elements in to the new space
    copy = self[index...size_to_copy]
    self[-copy.size..] = copy

    # Copy elements from iterable in to the original elements'
    # spots
    self[index, iterable.size] = iterable

    self
  end
end

class String < Reference
  # Replace all substrings that match *pattern* by a computed string.
  #
  # Creates a new string in which the non-overlapping substrings that match
  # *pattern* (the ones iterated by `pattern.all_matches(self)`) are replaced
  # by the result of calling *replace* on the corresponding `Regex::MatchData`
  # object.
  @[Deprecated("Luce is removing its custom extensions. Use `String#gsub(Regex, &)` instead. Will be removed with Luce v1.0")]
  def replace_all_mapped(pattern : Regex, replace : Proc(Regex::MatchData, String)) : String
    matches = pattern.all_matches(self).reverse!
    result = self

    matches.each do |match|
      next if match[0].size == 0
      result = result.sub(match.begin...match.end, replace.call(match))
    end

    result
  end

  # Split's the string, converts its parts, and combines them into a new string.
  @[Deprecated("Luce is removing its custom extensions. Use `Luce.string_split_map_join` instead. Will be removed with Luce v1.0")]
  def split_map_join(pattern : Regex,
                     on_match : Proc(Regex::MatchData, String)? = nil,
                     on_non_match : Proc(String, String)? = nil) : String
    on_match = on_match.nil? ? ->(match : Regex::MatchData) { match[0] } : on_match
    on_non_match = on_non_match.nil? ? ->(non_match : String) { non_match } : on_non_match

    builder = String::Builder.new
    start_index = 0
    self.scan(pattern).each do |match|
      builder << on_non_match.call(self[start_index...match.begin])
      builder << on_match.call(match).to_s
      start_index = match.end
    end
    builder << on_non_match.call(self[start_index..])
    builder.to_s
  end

  # Calculates the length of indentation a `String` has.
  #
  # The behaviour of tabs: https://spec.commonmark.org/0.30/#tabs
  @[Deprecated("Luce is removing its custom extensions. Use `Luce.string_indentation(String)` instead. Will be removed with Luce v1.0")]
  def indentation : Int32
    length = 0
    self.each_codepoint do |char|
      break if char != Luce::Charcode::SPACE && char != Luce::Charcode::TAB
      length += char == Luce::Charcode::TAB ? 4 - (length % 4) : 1
    end
    length
  end

  # Removes up to *length* characters of leading whitespace.
  @[Deprecated("Luce is removing its custom extensions. Use Luce.dedent_string(String, Int32) instead. Will be removed with Luce v1.0")]
  def dedent(length : Int32 = 4) : Luce::DedentedText
    # The way of handling tabs: https://spec.commonmark.org/0.30/#tabs
    whitespace_match = Regex.new("^[ \t]{0,#{length}}").match(self)
    tab_size = 4

    tab_remaining : Int32?
    start = 0
    whitespaces = whitespace_match.nil? ? nil : whitespace_match[0]?
    unless whitespaces.nil?
      indent_length = 0
      while start < whitespaces.size
        is_tab = whitespaces[start] == '\t'
        if is_tab
          indent_length += tab_size
          tab_remaining = 4
        else
          indent_length += 1
        end
        if indent_length >= length
          if tab_remaining != nil
            tab_remaining = indent_length - length
          end
          if indent_length == length || is_tab
            start += 1
          end
          break
        end
        if tab_remaining != nil
          tab_remaining = 0
        end
        start += 1
      end
    end

    Luce::DedentedText.new(self[start..], tab_remaining)
  end

  # Adds *width* of spaces to the beginning of this string.
  @[Deprecated("Luce is removing its custom extensions. Use `String#insert(0, \" \" * width)` instead. Will be removed with Luce v1.0")]
  def prepend_space(width : Int32) : String
    " " * width + self
  end

  # Converts this string to an array of `Luce::Line`.
  @[Deprecated("Luce is removing its custom extensions. Use `String#lines.map { |line| Luce::Line.new(line) } instead. Will be removed with Luce v1.0")]
  def to_lines : Array(Luce::Line)
    self.lines.map { |e| Luce::Line.new(e) }
  end

  @[Deprecated("Luce is removing its custom extensions. Use String#[-1] (default behaviour) or String#[String#size - n..] instead. Will be removed with Luce v1.0")]
  def last(n : Int32 = 1)
    self[size - n..]
  end
end

class Regex < Reference
  # Matches this pattern against the string repeatedly.
  #
  # If *start* is provided, matching will start at that index.
  #
  # The returned array contains non-overlapping matches of the pattern in the
  # *string*.
  #
  # The matches are found by repeatedly finding the first match of the pattern
  # in the string, initially starting from *start*, and then from the end of
  # the previous match (but always at least one position later than the start
  # of the previous match, in case the pattern matches an empty substring).
  #
  # ```
  # exp = Regex.new(%q{(\w+)})
  # str = "Dash is a bird"
  # matches = exp.all_matches(str, 8)
  # matches.each do |m|
  #   match = m[0].not_nil!
  #   puts match
  # end
  #
  # # => a
  # # => bird
  # ```
  @[Deprecated("Luce is removing its custom extensions. Try `String#match(regex, pos)` and increment 'pos'. Will be removed with Luce v1.0")]
  def all_matches(string : String, start : Int32 = 0) : Array(Regex::MatchData)
    raise IndexError.new("*start* is less than 0") if start < 0
    raise IndexError.new("*start* is greater than *string* size") if start > string.size
    matches = [] of Regex::MatchData
    offset : Int32 = start

    loop do
      _match = self.match(string, offset)
      break if _match.nil?
      matches << _match
      # account for empty spaces
      offset = _match[0].size == 0 ? offset + 1 : _match.end
    end
    matches
  end
end

struct Regex::MatchData
  # Returns the whole match String
  @[Deprecated("Luce is removing its custom extensions. Use `Regex::MatchData[0]` instead. Will be removed with Luce v1.0")]
  def match : String
    # ameba:disable Lint/NotNil
    self[0].not_nil!
  end
end
